Our dataset does not contain data for October, November and December of the year 2022. We are interested in the following main felonies: Burglary, Felony Assault, Grand Larceny, Grand Larceny of Motor Vehicle, Murder and Non-negligent Manslaughter, Rape, and Robbery.

Felony Frequency by Borough

complaint %>% 
  drop_na(borough, offense) %>% 
  group_by(borough) %>% 
  dplyr::summarize(n_obs = n()) %>% 
  ggplot(aes(x = reorder(borough, -n_obs), y = n_obs, fill = reorder(borough, -n_obs))) +
  geom_bar(stat = 'identity') +
  labs(
    title = "Frequency of Felonies by Borough (2016-2022)",
    x = "Borough",
    y = "Frequency"
  ) +
  theme(legend.position = "none")

Felony Frequency by Crime Type

complaint %>% 
  drop_na(offense) %>% 
  group_by(offense) %>% 
  dplyr::summarize(n_obs = n()) %>% 
  mutate(offense = reorder(offense, n_obs)) %>% 
  ggplot(aes(x = offense, y = n_obs, fill = offense)) +
  geom_bar(stat = 'identity') +
  labs(
    title = "Frequency of Felonies (2016-2022)",
    x = "Felony",
    y = "Frequency"
  ) +
  coord_flip() +
  theme(legend.position = "none")

Trend of Felony Frequency

complaint %>% 
  filter(year != 2022) %>% 
  drop_na(offense) %>% 
  group_by(offense, year) %>% 
  dplyr::summarize(n_obs = n()) %>% 
  ggplot(aes(x = year, y = n_obs, fill = offense)) +
  geom_bar(stat = 'identity') +
  labs(
    title = "Frequency of 7 Felonies by Year (2016-2021)",
    x = "Year",
    y = "Frequency",
    fill = "Felony"
  ) +
  theme(legend.position = "bottom") +
  guides(fill = guide_legend(nrow = 4,byrow = TRUE))

Trend of Proportion of Felony Frequency by Demographics of Victim

Sex

complaint %>% 
  drop_na(offense, vic_sex) %>% 
  group_by(year, vic_sex) %>% 
  dplyr::summarize(n_obs = n()) %>% 
  group_by(year) %>% 
  dplyr::summarize(vic_sex, percentage = n_obs / sum(n_obs)) %>% 
  ggplot(aes(x = year, y = percentage, fill = vic_sex)) +
  geom_bar(stat = 'identity') +
  labs(
    x = "Year",
    y = "Proportion",
    title = "Proportions of Felonies by Victim Sex and Year",
    fill = "Victim Sex"
  )

Age Group

complaint %>% 
  drop_na(offense, vic_age_group) %>% 
  group_by(year, vic_age_group) %>% 
  dplyr::summarize(n_obs = n()) %>% 
  group_by(year) %>% 
  dplyr::summarize(vic_age_group, percentage = n_obs / sum(n_obs)) %>% 
  ggplot(aes(x = year, y = percentage, fill = vic_age_group)) +
  geom_bar(stat = 'identity') +
  labs(
    x = "Year",
    y = "Proportion",
    title = "Proportions of Felonies by Victim Age Group and Year",
    fill = "Victim Age Group"
  )

Race

complaint %>% 
  drop_na(offense, vic_race) %>% 
  group_by(year, vic_race) %>% 
  dplyr::summarize(n_obs = n()) %>% 
  group_by(year) %>% 
  dplyr::summarize(vic_race, percentage = n_obs / sum(n_obs)) %>% 
  ggplot(aes(x = year, y = percentage, fill = vic_race)) +
  geom_bar(stat = 'identity') +
  labs(
    x = "Year",
    y = "Proportion",
    title = "Proportions of Felonies by Victim Race and Year",
    fill = "Victim Race"
  )

Month, Day of the Week, Time

Average Daily Felony Frequency by Year and Month

num_days = function(month, year) {
  
  year = as.integer(year)
  months = 1:12
  names(months) = month.abb
  month = months[month]
  
  as.numeric(strftime(as.Date(paste(year + month %/% 12, month %% 12 + 1, "01", sep = "-")) - 1, "%d"))
  
}

complaint %>% 
  drop_na(offense) %>% 
  mutate(year = fct_rev(year)) %>% 
  group_by(year, month) %>% 
  dplyr::summarize(mean_freq = n() / num_days(month, year)) %>% 
  plot_ly(
    x = ~month, y = ~year, z = ~mean_freq,
    type = "heatmap"
  ) %>% 
  colorbar(x = 1, y = 1) %>% 
  layout(
    title = "Average Daily Felony Frequency by Year and Month",
    xaxis = list(title = "Month"),
    yaxis = list(title = "Year")
  )

Average Hourly Felony Frequency by Time of the Week

complaint %>% 
  drop_na(offense) %>% 
  filter(hour != "00" | minute != "01") %>% 
  mutate(day_of_week = fct_rev(day_of_week)) %>% 
  group_by(hour, day_of_week) %>% 
  dplyr::summarize(mean_freq = n() / 352) %>% 
  plot_ly(
    x = ~hour, y = ~day_of_week, z = ~mean_freq,
    type = "heatmap"
  ) %>% 
  colorbar(x = 1, y = 1) %>% 
  layout(
    title = "Average Hourly Felony Frequency by Time of the Week",
    xaxis = list(title = "Hour of the Day"),
    yaxis = list(title = "Day of the Week")
  )